REM Copyright (c) Barry Kauler January 2013. bkhome.org
REM rewrote find_cat.c in BaCon. Reads categories.dat.
REM passed params: <packagegenericname> "<description>"
REM  ...return on stdout, found category.
REM or, no passed param(s), stdin is a pup-db pkg entry, empty DB_category field.
REM  ...return on stdout, db-entry, with category inserted in 5th field.
REM  ...can stdin a complete file, ex: cat Packages-ubuntu-precise-main | find_cat
REM or, one passed param, name of db-entries file. ex: find_cat Packages-ubuntu-precise-main
REM 130126 first release.
REM bacon 1.0.29 retains quote chars in ARGUMENT$ (refer: http://basic-converter.proboards.com/index.cgi?board=bugs&action=display&thread=422)

GLOBAL categories$ ASSOC STRING
GLOBAL keywords$ ASSOC STRING

REM read /usr/local/petget/categories.dat and evaluate vars...
OPEN "/usr/local/petget/categories.dat" FOR READING AS handle1
WHILE NOT(ENDFILE(handle1)) DO
 READLN line$ FROM handle1
 chopped$=CHOP$(line$)
 IF EQUAL(chopped$,"") THEN CONTINUE
 first$=LEFT$(chopped$,1)
 IF EQUAL(first$,"#") THEN CONTINUE
 IF EQUAL(first$,"K") THEN
  REM ex: chopped$=KEYWDS_Graphic_viewer=" image_viewer thumbnail thumbnails "
  SPLIT chopped$ BY "=" TO array2$ SIZE dimension2
  val2$=CHOP$(array2$[1],"'\"")
  keywords$(array2$[0])=val2$
 ELSE
  REM ex: chopped$=PKGCAT_Help_Sub=" linux-faqs linux-howtos man-pages "
  SPLIT chopped$ BY "=" TO array1$ SIZE dimension1
  val$=CHOP$(array1$[1],"'\"")
  categories$(array1$[0])=val$
 END IF
WEND
CLOSE FILE handle1

description0$=" "
REM bacon 1.0.29 retains quote chars in ARGUMENT$, remove them...
fixedarg$=REPLACE$(ARGUMENT$, CHR$(34), "")
SPLIT fixedarg$ BY " " TO commandline$ SIZE numparams

IF numparams==2 THEN
 REM filename on commandline, with pup-db entries.
 IF NOT(FILEEXISTS(commandline$[1])) THEN END 2
 OPEN commandline$[1] FOR READING AS handle2
END IF

LABEL naughtyjump
REM read a db-entry either from stdin or file...
REM ex: bbe_0.2.2-1|bbe|0.2.2-1||BuildingBlock|136K|pool/universe/b/bbe|bbe_0.2.2-1_i386.deb|+libc6|sed-like editor for binary files|ubuntu|precise||
IF numparams==1 THEN INPUT onedb$
ELIF numparams==2 THEN
 IF ENDFILE(handle2) THEN
  CLOSE FILE handle2
  END 0
 END IF
 READLN onedb$ FROM handle2
END IF

IF numparams<3 THEN
 REM want to extract genericname and description...
 flagdeb=0
 SPLIT onedb$ BY "|" TO dbarray$ SIZE dim9
 IF dim9<11 THEN END 1
 IF dbarray$[6]=="" THEN
  packagename$=LCASE$(dbarray$[1])
 ELSE
  REM debian/ubuntu/raspbian, get genericname from 7th field...
  IF dbarray$[10]=="ubuntu" THEN flagdeb=1
  IF dbarray$[10]=="debian" THEN flagdeb=1
  IF dbarray$[10]=="raspbian" THEN flagdeb=1
  IF flagdeb==1 THEN
   pos1=INSTRREV(dbarray$[6],"/")
   IF pos1==0 THEN
    packagename$=LCASE$(dbarray$[6])
   ELSE
    len1=LEN(dbarray$[6])
    packagename0$=RIGHT$(dbarray$[6],len1-pos1)
    packagename$=LCASE$(packagename0$)
   END IF
  ELSE
   packagename$=LCASE$(dbarray$[1])
  END IF
 END IF
 description0$=CONCAT$(" ",dbarray$[9]," ")
 description$=LCASE$(description0$)
ELSE
 packagename$=LCASE$(commandline$[1])
 FOR zz=2 TO numparams-1
  description0$=CONCAT$(description0$,commandline$[zz]," ")
 NEXT
 description$=LCASE$(description0$)
END IF


REM search through the categories in categories.dat...
REM 'packagename$' is the pkgname trying to find a match to...
DB_category$=""
name$=CONCAT$(" ",packagename$," ")
LOOKUP categories$ TO acategory$ SIZE d
FOR x=0 TO d-1
 REM ex: PKGCAT_Desktop_appearance
 IF INSTR(categories$(acategory$[x]),name$)<>0 THEN
  SPLIT acategory$[x] BY "_" TO catsplit$ SIZE dim
  IF dim < 3 THEN CONTINUE
  IF catsplit$[2]=="Sub" THEN
   DB_category$=catsplit$[1]
   REM no sub-category. ex: DB_category$=Desktop
  ELSE
   DB_category$=CONCAT$(catsplit$[1],";",catsplit$[2])
   REM ex: DB_category$=Desktop;appearance
  END IF
  BREAK
 END IF
NEXT

REM check if a library...
IF DB_category$=="" THEN
 IF (INSTR(name$," lib") NE 0) THEN DB_category$="BuildingBlock"
END IF

REM now look for keywords in description...
IF DB_category$=="" THEN
 LOOKUP keywords$ TO akeycat$ SIZE dim1
 FOR y=0 TO dim1-1
  REM ex: KEYWDS_Desktop_appearance
  SPLIT keywords$(akeycat$[y]) BY " " TO keynames$ SIZE dim3
  IF dim3==0 THEN CONTINUE
  FOR z=0 TO dim3-1
   REM some keywords are actually multiple words, with '_' delimiter...
   akeyname$=REPLACE$(keynames$[z],"_"," ")
   keynameptn$=CONCAT$(" ",akeyname$," ")
   IF INSTR(description$,keynameptn$)<>0 THEN
    SPLIT akeycat$[y] BY "_" TO keysplit$ SIZE dim2
    IF dim2 < 3 THEN CONTINUE
    IF keysplit$[2]=="Sub" THEN
     DB_category$=keysplit$[1]
     REM no sub-category. ex: DB_category$=Desktop
    ELSE
     DB_category$=CONCAT$(keysplit$[1],";",keysplit$[2])
     REM ex: DB_category$=Desktop;appearance
    END IF
    BREAK
   END IF
  NEXT
 NEXT
END IF

REM fallback for ubuntu/debian/raspbian...
IF DB_category$=="" THEN
 IF numparams<3 THEN
  IF flagdeb==1 THEN
   REM db-entry on stdin may have debian "Section" value in DB_category field...
   IF dbarray$[4]<>"" THEN
    IF (REGEX(dbarray$[4],"vcs$") NE 0) THEN DB_category$="Utility;development"
    ELIF (REGEX(dbarray$[4],"admin$") NE 0) THEN DB_category$="Setup"
    ELIF (REGEX(dbarray$[4],"doc$") NE 0) THEN DB_category$="Help"
    ELIF (REGEX(dbarray$[4],"games$") NE 0) THEN DB_category$="Fun"
    ELIF (REGEX(dbarray$[4],"science$") NE 0) THEN DB_category$="Personal;education"
   END IF
  END IF
 END IF
END IF

IF DB_category$=="" THEN DB_category$="BuildingBlock"

IF numparams<3 THEN
 REM db-entry came in via stdin or file, send it out via stdout...
 REM substitute with the found DB_category, 5th field...
 dbarray$[4]=DB_category$
 FOR yy=0 TO dim9-2
  PRINT dbarray$[yy],"|";
 NEXT
 PRINT
 GOTO naughtyjump
ELSE
 PRINT DB_category$
END IF


